#获取西刺代理的IP - 简书 https://www.jianshu.com/p/22390e0b20b7
#不可用！！！！

# coding=utf8
import requests
from bs4 import BeautifulSoup
import re
import os.path

user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5)'
headers = {'User-Agent': user_agent}


def getListProxies():
    session = requests.session()
    page = session.get("http://www.xicidaili.com/nn", headers=headers)
    soup = BeautifulSoup(page.text, 'lxml')

    proxyList = []
    taglist = soup.find_all('tr', attrs={'class': re.compile("(odd)|()")})
    for trtag in taglist:
        tdlist = trtag.find_all('td')
        proxy = {'http': tdlist[1].string + ':' + tdlist[2].string,
                 'https': tdlist[1].string + ':' + tdlist[2].string}
        """
        url = "http://ip.chinaz.com/getip.aspx"  # 用来测试IP是否可用的url(现在该网址好像不能使用)
        try:
            print('proxy is ',proxy)
            response = session.get(url, proxies=proxy, timeout=5)
            print(response)
            proxyList.append(proxy)
            if (len(proxyList) == 3):
                break
        except Exception as e:
            continue
"""
        proxyList.append(proxy)
        # 设定代理ip个数
        if len(proxyList) >= 10:
            break

    return proxyList


res = getListProxies()
print (len(res))
print(res)